suppressWarnings(library("knitr"))
suppressWarnings(library("gplots"))
suppressWarnings(library("igraph"))
opts_chunk$set(tidy.opts=list(width.cutoff=80),tidy=TRUE,dev="png",dpi=150)
load_obj <- function(file.path){
temp.space <- new.env()
obj<-load(file.path, temp.space)
obj2<-get(obj, temp.space)
rm(temp.space)
return(obj2)
}
maxScl <- function(df, dir='row', max_value=NULL, log_space=TRUE){
if(dir=='row'){
dir=1
}else if(dir=='col'){
dir=2
}else{
print("dir must be 'row' or 'col'.")
return
}
if(is.null(max_value)){
max_value=median(apply(df,dir,max))
}
if(log_space){
df=expm1(df)
max_value=expm1(max_value)
}
df_scl=sweep(df,dir,apply(df,dir,max),"/")
df_scl=df_scl*max_value
if(log_space){
df_scl=log1p(df_scl)
}
return(df_scl)
}
module.match <- function(NMF_list, ref_rep='rep0', min.cor=0.5){
NMF_Ms=list()
NMF_Gs=list()
NMF_Gs[[ref_rep]]=maxScl(NMF_list[[ref_rep]][["G"]],dir = 'col',max_value=1,log_space = F)
num_M=dim(NMF_Gs[[ref_rep]])[2]
num_gen=dim(NMF_Gs[[ref_rep]])[1]
for(rep in setdiff(names(NMF_list),ref_rep)){
##match modules across different replicates
n_G=NMF_list[[rep]][['G']]
n_G=maxScl(n_G,dir = 'col',max_value=1,log_space = F)
NMF_Gs[[rep]]=n_G
match_ind=c()
for(i in 1:num_M){
vec=NMF_Gs[[ref_rep]][,i]
vec_ind=which(vec>0.1)
G_ind=which(apply(n_G,1,max)>0.1)
use_ind=union(vec_ind,G_ind)
cors=cor(vec[use_ind],n_G[use_ind,])
cor_ind=which.max(cors)
if(max(cors) < min.cor){ ##SHOULD RECORD THE CORRELATION SCORES AND SEE IF THERE ARE AMBIGUOUS MATCHES
match_ind=c(match_ind,NaN)
#print(paste(stage,"rep0 module",i-1,"has no match in",rep))
NMF_Gs[[rep]][,i]=NaN
}else{
NMF_Gs[[rep]][,i]=n_G[,cor_ind]
match_ind=c(match_ind,cor_ind)
}
}
if(num_M!=length(unique(match_ind))){
print(paste(stage,rep," modules with duplicated matches:"))
for (i in match_ind[which(duplicated(match_ind))]){
if(!is.na(i)){
print(paste(toString(which(match_ind==i)-1), "from rep0 matched to",i-1))
}
}
print(paste("modules with no match",toString(setdiff(c(1:num_M),unique(match_ind))-1)))
}
#NMF_Gs[[paste0("DS",stage)]][[rep]]=maxScl(NMF_Gs[[paste0("DS",stage)]][[rep]],dir = 'col')
#NMF_tops[[rep]]=NMF_list[[rep]][["top30genes"]]
}
num_reps=length(names(NMF_list))
for(m in colnames(NMF_Gs[[ref_rep]])){
NMF_Ms[[m]]=matrix(nrow=num_gen,ncol = num_reps)
rownames(NMF_Ms[[m]])=rownames(NMF_Gs[[ref_rep]])
colnames(NMF_Ms[[m]])=paste0("rep",c(0:(num_reps-1)))
for(rep in names(NMF_list)){
NMF_Ms[[m]][,rep]=NMF_Gs[[rep]][,m]
}
max_w=apply(NMF_Ms[[m]],1, function(x) max(x,na.rm = T))
#min_w=apply(NMF_Ms[[paste0("DS",stage)]][[paste0("Module",m)]],1,min)
NMF_Ms[[m]]=NMF_Ms[[m]][-which(max_w<0.1), ,drop=F]
re_ind=order(NMF_Ms[[m]][,ref_rep],decreasing = T)
NMF_Ms[[m]]=NMF_Ms[[m]][re_ind,]
#print(paste("Module",m,"# kept genes:",dim(NMF_Ms[[paste0("DS",stage)]][[paste0("Module",m)]])[1]))
}
return(NMF_Ms)
}
A best K (number of modules or n_component argument used for running NMF) is picked for each stage based on the stability of the results from 10 NMF runs with random initial conditions.
ZFHIGH_k=c(10)
ZFOBLONG_k=c(11)
ZFDOME_k=c(17)
ZF30_k=c(15)
ZF50_k=c(20)
ZFS_k=c(25)
ZF60_k=c(25)
ZF75_k=c(24)
ZF90_k=c(45)
ZFB_k=c(40)
ZF3S_k=c(31)
ZF6S_k=c(42)
stages=c("ZFHIGH","ZFOBLONG","ZFDOME","ZF30","ZF50","ZFS","ZF60","ZF75","ZF90","ZFB","ZF3S","ZF6S")
zf_C<-list()
zf_G<-list()
zf_top <- list()
zf_genes=c()
NMF_list=list()
module_match=list()
for(stage in stages){
stage_k=get(paste0(stage,"_k"))[1]
NMF_obj=load_obj(paste0("./Results/DS_stages/DS_",stage,"/result_tbls.Robj"))
NMF_list[[stage]]=NMF_obj[[paste0("K=",stage_k)]][["rep0"]]
zf_C[[stage]]=data.frame(NMF_list[[stage]][["C"]], stringsAsFactors = F)
zf_G[[stage]]=data.frame(NMF_list[[stage]][["G"]], stringsAsFactors = F)
colnames(zf_G[[stage]])=rownames(zf_C[[stage]])
zf_genes=c(zf_genes, rownames(zf_G[[stage]]))
zf_top[[stage]]=data.frame(NMF_list[[stage]][["top30genes"]], stringsAsFactors = F)
module_match[[stage]]=module.match(NMF_obj[[paste0("K=",stage_k)]], ref_rep='rep0',min.cor=0.5)
}
[1] "ZFHIGH rep9 modules with duplicated matches:"
[1] "0, 9 from rep0 matched to 0"
[1] "modules with no match 8"
[1] "ZFOBLONG rep1 modules with duplicated matches:"
[1] "4, 8 from rep0 matched to 8"
[1] "modules with no match 10"
[1] "ZFDOME rep1 modules with duplicated matches:"
[1] "2, 5 from rep0 matched to 2"
[1] "modules with no match 3, 14"
[1] "ZFDOME rep2 modules with duplicated matches:"
[1] "2, 5 from rep0 matched to 5"
[1] "11, 12 from rep0 matched to 13"
[1] "modules with no match 2, 14"
[1] "ZFDOME rep3 modules with duplicated matches:"
[1] "5, 14 from rep0 matched to 13"
[1] "modules with no match 2"
[1] "ZFDOME rep4 modules with duplicated matches:"
[1] "2, 5 from rep0 matched to 5"
[1] "modules with no match 2, 14"
[1] "ZFDOME rep5 modules with duplicated matches:"
[1] "5, 14 from rep0 matched to 5"
[1] "modules with no match 14"
[1] "ZFDOME rep7 modules with duplicated matches:"
[1] "5, 14 from rep0 matched to 5"
[1] "modules with no match 16"
[1] "ZFDOME rep8 modules with duplicated matches:"
[1] "5, 14 from rep0 matched to 15"
[1] "modules with no match 2"
[1] "ZF50 rep1 modules with duplicated matches:"
[1] "5, 12 from rep0 matched to 5"
[1] "modules with no match 16"
[1] "ZF50 rep5 modules with duplicated matches:"
[1] "5, 12 from rep0 matched to 5"
[1] "modules with no match 16"
[1] "ZF50 rep8 modules with duplicated matches:"
[1] "5, 12 from rep0 matched to 5"
[1] "modules with no match 16"
[1] "ZFS rep3 modules with duplicated matches:"
[1] "13, 16 from rep0 matched to 13"
[1] "modules with no match 22"
[1] "ZFS rep4 modules with duplicated matches:"
[1] "16, 24 from rep0 matched to 16"
[1] "modules with no match 23"
[1] "ZFS rep5 modules with duplicated matches:"
[1] "13, 17 from rep0 matched to 23"
[1] "0, 24 from rep0 matched to 0"
[1] "modules with no match 22, 24"
[1] "ZFS rep6 modules with duplicated matches:"
[1] "17, 21 from rep0 matched to 22"
[1] "22, 24 from rep0 matched to 21"
[1] "modules with no match 10, 20"
[1] "ZFS rep7 modules with duplicated matches:"
[1] "16, 24 from rep0 matched to 20"
[1] "modules with no match 21, 23"
[1] "ZFS rep8 modules with duplicated matches:"
[1] "16, 24 from rep0 matched to 21"
[1] "modules with no match 13"
[1] "ZFS rep9 modules with duplicated matches:"
[1] "15, 19 from rep0 matched to 23"
[1] "16, 22 from rep0 matched to 13"
[1] "modules with no match 10, 24"
[1] "ZF60 rep4 modules with duplicated matches:"
[1] "0, 13 from rep0 matched to 17"
[1] "modules with no match 0"
[1] "ZF60 rep7 modules with duplicated matches:"
[1] "13, 15 from rep0 matched to 13"
[1] "modules with no match 24"
[1] "ZF75 rep1 modules with duplicated matches:"
[1] "13, 14 from rep0 matched to 14"
[1] "modules with no match 13"
[1] "ZF75 rep2 modules with duplicated matches:"
[1] "1, 20 from rep0 matched to 1"
[1] "modules with no match 13"
[1] "ZF75 rep4 modules with duplicated matches:"
[1] "1, 20 from rep0 matched to 1"
[1] "5, 22 from rep0 matched to 5"
[1] "modules with no match 0, 13"
[1] "ZF75 rep5 modules with duplicated matches:"
[1] "13, 14 from rep0 matched to 14"
[1] "1, 20 from rep0 matched to 1"
[1] "modules with no match 0, 13"
[1] "ZF75 rep6 modules with duplicated matches:"
[1] "13, 14 from rep0 matched to 14"
[1] "modules with no match 13"
[1] "ZF75 rep7 modules with duplicated matches:"
[1] "13, 14 from rep0 matched to 14"
[1] "1, 20 from rep0 matched to 1"
[1] "modules with no match 0, 13"
[1] "ZF75 rep8 modules with duplicated matches:"
[1] "13, 14 from rep0 matched to 14"
[1] "5, 22 from rep0 matched to 5"
[1] "modules with no match 13, 21"
[1] "ZF75 rep9 modules with duplicated matches:"
[1] "13, 14 from rep0 matched to 14"
[1] "modules with no match 13"
[1] "ZF90 rep1 modules with duplicated matches:"
[1] "modules with no match 15, 28, 37, 38, 39"
[1] "ZF90 rep2 modules with duplicated matches:"
[1] "5, 14 from rep0 matched to 14"
[1] "modules with no match 11, 23, 24, 37, 39, 44"
[1] "ZF90 rep3 modules with duplicated matches:"
[1] "8, 26 from rep0 matched to 8"
[1] "modules with no match 25, 26, 32, 35, 39, 41, 44"
[1] "ZF90 rep4 modules with duplicated matches:"
[1] "0, 6 from rep0 matched to 0"
[1] "8, 26 from rep0 matched to 8"
[1] "modules with no match 23, 25, 32, 37"
[1] "ZF90 rep5 modules with duplicated matches:"
[1] "5, 14 from rep0 matched to 14"
[1] "8, 26 from rep0 matched to 8"
[1] "modules with no match 26, 36, 37, 44"
[1] "ZF90 rep6 modules with duplicated matches:"
[1] "modules with no match 25, 35, 39, 40, 43, 44"
[1] "ZF90 rep7 modules with duplicated matches:"
[1] "8, 26 from rep0 matched to 8"
[1] "modules with no match 30, 32, 36, 42, 43"
[1] "ZF90 rep8 modules with duplicated matches:"
[1] "5, 14 from rep0 matched to 14"
[1] "modules with no match 34, 36, 37, 38, 40"
[1] "ZF90 rep9 modules with duplicated matches:"
[1] "5, 14 from rep0 matched to 14"
[1] "9, 38 from rep0 matched to 33"
[1] "modules with no match 11, 23, 25, 35, 37, 38, 42"
[1] "ZFB rep1 modules with duplicated matches:"
[1] "26, 38 from rep0 matched to 26"
[1] "modules with no match 25, 29, 34, 37"
[1] "ZFB rep2 modules with duplicated matches:"
[1] "4, 23 from rep0 matched to 4"
[1] "modules with no match 25, 26, 31, 33, 38, 39"
[1] "ZFB rep3 modules with duplicated matches:"
[1] "modules with no match 28, 33, 39"
[1] "ZFB rep4 modules with duplicated matches:"
[1] "4, 23 from rep0 matched to 4"
[1] "12, 24 from rep0 matched to 12"
[1] "modules with no match 26, 32, 33, 34, 38, 39"
[1] "ZFB rep5 modules with duplicated matches:"
[1] "12, 24 from rep0 matched to 12"
[1] "modules with no match 20, 27, 29, 33, 37, 39"
[1] "ZFB rep6 modules with duplicated matches:"
[1] "modules with no match 22, 25, 31, 32, 34"
[1] "ZFB rep7 modules with duplicated matches:"
[1] "4, 23 from rep0 matched to 4"
[1] "modules with no match 25, 29, 35, 39"
[1] "ZFB rep8 modules with duplicated matches:"
[1] "modules with no match 25, 31, 34"
[1] "ZFB rep9 modules with duplicated matches:"
[1] "12, 24 from rep0 matched to 12"
[1] "modules with no match 22, 24, 31, 34, 38"
[1] "ZF3S rep1 modules with duplicated matches:"
[1] "5, 24 from rep0 matched to 5"
[1] "modules with no match 28"
[1] "ZF3S rep2 modules with duplicated matches:"
[1] "17, 27 from rep0 matched to 17"
[1] "modules with no match 29"
[1] "ZF3S rep3 modules with duplicated matches:"
[1] "5, 24 from rep0 matched to 5"
[1] "26, 30 from rep0 matched to 30"
[1] "modules with no match 7, 24"
[1] "ZF3S rep4 modules with duplicated matches:"
[1] "5, 24 from rep0 matched to 5"
[1] "17, 27 from rep0 matched to 17"
[1] "modules with no match 27, 29, 30"
[1] "ZF3S rep5 modules with duplicated matches:"
[1] "5, 24 from rep0 matched to 5"
[1] "modules with no match 25, 29"
[1] "ZF3S rep6 modules with duplicated matches:"
[1] "17, 27 from rep0 matched to 17"
[1] "modules with no match 29, 30"
[1] "ZF3S rep7 modules with duplicated matches:"
[1] "5, 24 from rep0 matched to 5"
[1] "17, 27 from rep0 matched to 17"
[1] "26, 30 from rep0 matched to 24"
[1] "modules with no match 7, 29, 30"
[1] "ZF3S rep9 modules with duplicated matches:"
[1] "5, 24 from rep0 matched to 5"
[1] "modules with no match 7, 30"
[1] "ZF6S rep1 modules with duplicated matches:"
[1] "0, 25 from rep0 matched to 0"
[1] "16, 27 from rep0 matched to 16"
[1] "28, 32 from rep0 matched to 29"
[1] "modules with no match 25, 31, 37, 38, 40, 41"
[1] "ZF6S rep2 modules with duplicated matches:"
[1] "15, 27 from rep0 matched to 27"
[1] "modules with no match 9, 25, 34, 35, 38, 39, 40"
[1] "ZF6S rep3 modules with duplicated matches:"
[1] "0, 25 from rep0 matched to 0"
[1] "16, 27 from rep0 matched to 16"
[1] "9, 34 from rep0 matched to 8"
[1] "modules with no match 25, 30, 33, 39, 41"
[1] "ZF6S rep4 modules with duplicated matches:"
[1] "modules with no match 24, 25, 33, 35, 39, 40"
[1] "ZF6S rep5 modules with duplicated matches:"
[1] "modules with no match 30, 34, 37, 39"
[1] "ZF6S rep6 modules with duplicated matches:"
[1] "7, 21 from rep0 matched to 21"
[1] "modules with no match 30, 34, 35, 36, 38"
[1] "ZF6S rep7 modules with duplicated matches:"
[1] "16, 27 from rep0 matched to 27"
[1] "modules with no match 25, 34, 36, 37, 38, 39"
[1] "ZF6S rep8 modules with duplicated matches:"
[1] "16, 27 from rep0 matched to 16"
[1] "13, 34 from rep0 matched to 13"
[1] "modules with no match 30, 37, 38, 41"
[1] "ZF6S rep9 modules with duplicated matches:"
[1] "16, 27 from rep0 matched to 16"
[1] "13, 34 from rep0 matched to 13"
[1] "modules with no match 33, 34, 35, 37, 40"
Batch modules are found using the BatchGene function in Seurat package. Noise modules are defined as the ones that are primarily driven by a single gene (the top ranked gene has a weight more than 3 times the weight of the second ranked gene).
zf_use = filter.modules(zf_C,zf_G,module_match = module_match, batch.cutoff = 0.715, batch.field = 3) #0.735good
[1] "Stage: ZFHIGH"
[1] "number of batches: 2"
[1] 0.8451613 0.1548387
[1] "Batch modules:"
[1] "8" "5" "0" "1"
[1] "Stage: ZFOBLONG"
[1] "number of batches: 2"
[1] 0.361809 0.638191
[1] "Batch modules:"
[1] "2" "9"
[1] "Stage: ZFDOME"
[1] "number of batches: 1"
[1] 1
[1] "Batch modules:"
NULL
[1] "Stage: ZF30"
[1] "number of batches: 2"
[1] 0.2974768 0.7025232
[1] "Batch modules:"
[1] "3" "5"
[1] "Stage: ZF50"
[1] "number of batches: 4"
[1] 0.2195122 0.2439024 0.2672693 0.2693160
[1] "Batch modules:"
[1] "3" "4" "10" "16" "9" "7" "0"
[1] "Stage: ZFS"
[1] "number of batches: 1"
[1] 1
[1] "Batch modules:"
NULL
[1] "Stage: ZF60"
[1] "number of batches: 3"
[1] 0.3470602 0.3034992 0.3494406
[1] "Batch modules:"
[1] "5" "15" "13" "17"
[1] "Noise modules:"
[1] "23"
[1] "Stage: ZF75"
[1] "number of batches: 3"
[1] 0.3149905 0.4685326 0.2164769
[1] "Batch modules:"
[1] "4" "23" "21" "16"
[1] "Noise modules:"
[1] "0"
[1] "Stage: ZF90"
[1] "number of batches: 3"
[1] 0.2200498 0.5063989 0.2735514
[1] "Batch modules:"
[1] "24" "9" "23"
[1] "Noise modules:"
[1] "6" "29" "31" "33" "34" "35" "36" "37" "38" "39" "40" "41" "43" "44"
[1] "Stage: ZFB"
[1] "number of batches: 2"
[1] 0.03492596 0.07069014 0.66429170 0.23009220
[1] "Batch modules:"
[1] "15"
[1] "Noise modules:"
[1] "20" "26" "27" "28" "30" "32" "34" "35" "36" "37" "38" "39"
[1] "Stage: ZF3S"
[1] "number of batches: 1"
[1] 1
[1] "Batch modules:"
NULL
[1] "Noise modules:"
[1] "28" "29"
[1] "Stage: ZF6S"
[1] "number of batches: 2"
[1] 0.5317763 0.4682237
[1] "Batch modules:"
[1] "25" "8"
[1] "Noise modules:"
[1] "24" "28" "30" "31" "32" "33" "36" "38" "39" "41"
for(stage in stages){
print(stage)
#print(dim(DS_C_use[[stage]]))
print(dim(zf_use[["G"]][[stage]]))
}
[1] "ZFHIGH"
[1] 1262 6
[1] "ZFOBLONG"
[1] 1166 9
[1] "ZFDOME"
[1] 1583 17
[1] "ZF30"
[1] 1553 13
[1] "ZF50"
[1] 1721 13
[1] "ZFS"
[1] 1573 25
[1] "ZF60"
[1] 1749 20
[1] "ZF75"
[1] 1809 19
[1] "ZF90"
[1] 1833 28
[1] "ZFB"
[1] 1856 27
[1] "ZF3S"
[1] 1825 29
[1] "ZF6S"
[1] 1854 30
For each stage, calculate the coefficient of variance (CV) and a ‘specificity score’ for each gene’s weight in each module across the 10 replications. To calculate the CV of a gene in a particular module, we first extract that module from each of the 10 NMF runs (modules are matched by correlation of gene weigths). Then the CV of a gene in this module is defined as the mean of its weights divided by the standard deviation of its weights across these 10 replications. The specificity score of a gene for a module is calculated as the weight of the gene in that module divided by the sum of the gene’s weights in all modules from the same run. If a gene is a ‘robust’ marker of the module, it should have a small CV and a high specificity. We then plot these statistics of the top ranking genes in each module to see when CV becomes high and specificity becomes low, indicating the genes ranked higher than that are good genes to use in the overlap score calculations.
weight_cv=list()
for(stage in names(zf_use$match)){
#num_gen=min(unlist(lapply(zf_use$match[[stage]], function(x) dim(x)[1])))
num_gen=40
weight_cv[[stage]]=matrix(nrow=num_gen, ncol = length(zf_use$match[[stage]]))
colnames(weight_cv[[stage]])=names(zf_use$match[[stage]])
for(m in names(zf_use$match[[stage]])){
num_val = min(num_gen,dim(zf_use$match[[stage]][[m]])[1])
weight_cv[[stage]][1:num_val,m]=apply(zf_use$match[[stage]][[m]][1:num_val,], 1, function(x) sqrt(var(x,na.rm = T))/mean(x, na.rm=T))
}
}
weight_spec=list()
for(stage in names(zf_use$match)){
weight_spec[[stage]]=zf_use$G[[stage]]
rownames(weight_spec[[stage]])=1:dim(zf_use$G[[stage]])[1]
weight_spec[[stage]]=sweep(weight_spec[[stage]],1,apply(zf_use$G[[stage]],1,sum),"/")
for(i in 1:dim(zf_use$G[[stage]])[2]){
ind_i=order(zf_use$G[[stage]][,i],decreasing = T)
weight_spec[[stage]][,i]=weight_spec[[stage]][ind_i,i]
}
}
par(mfrow=c(2,2))
for(stage in names(weight_cv)){
plot(1:dim(weight_cv[[stage]])[1],apply(weight_cv[[stage]],1,function(x) mean(x,na.rm=T)),main=stage, ylab="Mean CV",xlab="Rank")
plot(1:40,apply(weight_spec[[stage]][1:40,],1,function(x) mean(x,na.rm=T)),main=stage, ylab="Mean Specificity",xlab="Rank")
plot(1:dim(weight_cv[[stage]])[1],apply(weight_cv[[stage]],1,function(x) median(x,na.rm=T)),main=stage, ylab="Median CV",xlab="Rank")
plot(1:40,apply(weight_spec[[stage]][1:40,],1,function(x) median(x,na.rm=T)),main=stage, ylab="Median Specificity",xlab="Rank")
}
We decided to use the top 25 genes in each module in this calculation. The overlap of two modules is calculated as the sum of the weights of shared genes divided by the sum of weights of all genes. The results of the overlap scores are visualized in heat maps.
If a stage was not deeply or comprehensively sampled and sequenced, we might not be able to recover certain modules from that stage. This could potentially create dis-connections in the module lineages. In order to produce continuous module lineages when there is potential occasional drop-out of modules, we allow modules separated by one stage to connect to each other when connection to immediate neighbouring stage is not found.
For each module, find its most overlaped module in each of the two previous stages. Only modules with >22.5% overlaps are taken into account.
## for each module at one stage, want to find max correlated one in the two previous stages
connect_module <- function(thres1=0.15, thres2=0.25,G_cor_use,G_cor_use2){
G_connect <- list()
for(i in 1:(length(stages)-1)){
stage=stages[i]
stage_next=stages[i+1]
G_cor_stage=G_cor_use[[stage]]
Max_pre=apply(G_cor_stage,1,order)
Max_pre_ind=Max_pre[dim(Max_pre)[1],]
Max_pre_M=colnames(G_cor_stage)[Max_pre_ind]
Max_value=apply(G_cor_stage,1,max)
has_pre_ind=which(Max_value>thres1)
has_pre_M=rownames(G_cor_stage)[has_pre_ind]
if(i==1){
G_connect[[stage_next]]=data.frame(matrix(NA, nrow = 1, ncol = dim(G_cor_stage)[1]),row.names=stage)
colnames(G_connect[[stage_next]])=rownames(G_cor_stage)
G_connect[[stage_next]][,has_pre_M]=Max_pre_M[has_pre_ind]
G_connect[[stage_next]]=G_connect[[stage_next]][,has_pre_M]
}else{
stage_pre=stages[i-1]
G_cor_stage2=G_cor_use2[[stage_pre]]
all_M=union(rownames(G_cor_stage2),rownames(G_cor_stage))
G_connect[[stage_next]]=data.frame(matrix(NA, nrow = 2, ncol = length(all_M)),row.names=c(stage,stage_pre))
colnames(G_connect[[stage_next]])=all_M
G_connect[[stage_next]][1,has_pre_M]=Max_pre_M[has_pre_ind]
G_cor_stage=G_cor_use2[[stage_pre]]
Max_pre=apply(G_cor_stage,1,order)
Max_pre_ind=Max_pre[dim(Max_pre)[1],]
Max_pre_M=colnames(G_cor_stage)[Max_pre_ind]
Max_value=apply(G_cor_stage,1,max)
has_pre_ind=which(Max_value>thres2)
has_pre_M2=rownames(G_cor_stage)[has_pre_ind]
G_connect[[stage_next]][2,has_pre_M2]=Max_pre_M[has_pre_ind]
G_connect[[stage_next]]=G_connect[[stage_next]][,union(has_pre_M,has_pre_M2)]
}
}
return(G_connect)
}
G_int_connect=connect_module(G_cor_use = G_int, G_cor_use2 = G_int2, thres1 = 0.2,thres2 = 0.2)
We start from modules in the oldest stage (6-somites). Each module is first connected to its most overlaped module in the immediate previous stage. If no potential connection is recorded (in G_int_connect) for the immediate previous stage, it will then be connected to the module recorded for the stage earlier (if there is one). When the overlap between a module and its most overlapped module in the immediate previous stage is less than 35%, and at the same time it has more than 50% overlap with its most overlapped module two stages earlier, we then directly connect this module to the more previous module, and cut its connection to the one in the immidiate previous stage (this case didn’t occur here).
build_netM <- function(G_connect,G_cor_use,G_cor_use2,thres=NULL,thres_pre=NULL){
nodes_names=c()
for(i in 1:(length(stages)-1)){
stage=stages[i+1]
G_ans=G_connect[[stage]]
nodes_names=union(nodes_names,paste0(stage,'_',colnames(G_ans)))
nodes_names=union(nodes_names,paste0(stages[i],"_",G_ans[stages[i],which(!is.na(G_ans[stages[i],]))]))
if(i>1){
nodes_names=union(nodes_names,paste0(stages[i-1],"_",G_ans[stages[i-1],which(!is.na(G_ans[stages[i-1],]))]))
}
}
num_nodes=length(nodes_names)
net_M=matrix(0,ncol = num_nodes,nrow = num_nodes)
rownames(net_M)=nodes_names
colnames(net_M)=nodes_names
for(i in 1:(length(stages)-1)){
stage_pre=stages[i]
stage=stages[i+1]
G_ans=G_connect[[stage]]
for(j in colnames(G_ans)){
to_name=paste0(stage,'_',j)
if(!is.na(G_ans[stage_pre,j])){
from_M=G_ans[stage_pre,j]
from_name=paste0(stage_pre,'_',from_M)
##get the correlation score to put in the connection matirx
net_M[from_name,to_name]=G_cor_use[[stage_pre]][j,from_M]
}
if(i!=1){
stage_pre2=stages[i-1]
if(!is.na(G_ans[stage_pre2,j])){
from_M2=G_ans[stage_pre2,j]
from_name2=paste0(stage_pre2,"_",from_M2)
if(is.na(G_ans[stage_pre,j])){
net_M[from_name2,to_name]=G_cor_use2[[stage_pre2]][j,from_M2]
}else if(!is.null(thres)){
G_cor=G_cor_use[[stage_pre]][j,from_M]
G_cor_pre=G_cor_use2[[stage_pre2]][j,from_M2]
if(G_cor<thres && G_cor_pre>thres_pre){
print(paste0("add ",from_name2," to ",to_name))
net_M[from_name2,to_name]=G_cor_use2[[stage_pre2]][j,from_M2]
print(paste0("delete ", from_name," to ",to_name))
net_M[from_name,to_name]=0
}
}
}
}
}
}
return(net_M)
}
net_int=build_netM(G_int_connect, G_int, G_int2, thres = 0.35, thres_pre = 0.5)
zf_top$ZF60
zf_top$ZF75
zf_top$ZF90
path_score=calc_path_qual(net_int)
hist(path_score,breaks = 50, main="average weighted overlap")
Most of the path with <0.44 average overlap were short or consist of either ubiquitous or lowly expressed genes.
end_nodes_bad=names(path_score[path_score<0.44])
for(node in end_nodes_bad){
#print(get_upstream(net_int,node))
bad_path=get_upstream(net_int,node)
bad_tbl=matrix(nrow=20,ncol=length(bad_path))
colnames(bad_tbl)=bad_path
for(bad_node in bad_path){
stage=unlist(strsplit(bad_node,"_"))[1]
m=unlist(strsplit(bad_node,"_"))[2]
bad_tbl[,bad_node]=as.character(zf_top[[stage]][1:20,paste0("Module.",m)])
}
print(bad_tbl)
}
ZFOBLONG_7 ZFHIGH_2
[1,] "MID1IP1A" "MID1IP1A"
[2,] "ILDR1A" "STARD14"
[3,] "TDGF1" "KRT18"
[4,] "PPDPFA" "KLF17"
[5,] "CRELD2" "GADD45BB"
[6,] "FTR83" "CCND1"
[7,] "CDKN1CA" "CDKN1CA"
[8,] "EF1" "BTG1"
[9,] "GPR160" "GADD45BA"
[10,] "ZNHIT6" "MPLKIP"
[11,] "KLF17" "LRWD1"
[12,] "CLDNE" "NET1"
[13,] "FOXI1" "CLDNE"
[14,] "SI:CH73-1A9.3" "ARL13A"
[15,] "GPAT2" "GRHL3"
[16,] "ACTB1" "MARCKSL1B"
[17,] "ATP1B3A" "ACKR3B"
[18,] "ELOVL1B" "SOX19A"
[19,] "GNMT" "IRX7"
[20,] "SMPD5" "RASSF7B"
ZFDOME_11 ZFOBLONG_3
[1,] "CST3" "ALDOB"
[2,] "CD9B" "ASB11"
[3,] "ATP1B1A" "ID1"
[4,] "LRWD1" "APOEB"
[5,] "FAM212AA" "HSPB1"
[6,] "FAM46BA" "SI:CH1073-80I24.3"
[7,] "CABZ01070258.1" "TBX16"
[8,] "APELA" "MALAT1"
[9,] "CXCR4A" "ZGC:110425"
[10,] "CX43.4" "FAM212AA"
[11,] "ABRACL" "AKAP12B"
[12,] "SSR3" "SI:CH211-152C2.3"
[13,] "TDGF1" "NNR"
[14,] "CXCR4B" "CXCR4B"
[15,] "CYP26A1" "SI:CH73-1A9.3"
[16,] "LRATB" "CITED4B"
[17,] "H2AFX" "SI:DKEY-228B2.6"
[18,] "ETV4" "POLR3GLA"
[19,] "ID1" "CXCR4A"
[20,] "ZIC2B" "CABZ01070258.1"
ZFS_10 ZF50_18 ZF30_12 ZFDOME_8 ZFHIGH_3
[1,] "EPCAM" "BLF" "SI:CH1073-190K2.1" "H2AFX" "PLK1"
[2,] "NNR" "SI:CH211-133N4.4" "ACP5A" "CTH1" "CCNG1"
[3,] "CST3" "SI:CH73-52F24.4" "THY1" "THY1" "CTSBA"
[4,] "NPC2" "CA9" "CCNA1" "CCNA1" "F11R.1"
[5,] "H1M" "OCLNA" "NPC2" "CTSBA" "NUSAP1"
[6,] "CAPNS1A" "MCM6" "OCLNA" "SNAI1A" "TMEM263"
[7,] "CXCR4B" "F11R.1" "ZGC:113886" "CCNG1" "ANKRD12"
[8,] "DNAJB1B" "EPCAM" "MIBP" "MPLKIP" "CCNA1"
[9,] "OCLNA" "TIFA" "SPINT2" "BLF" "RNF128A"
[10,] "PDIA6" "SI:CH211-86H15.1" "FOPNL" "SI:CH211-173M16.2" "CTH1"
[11,] "SI:CH73-299H12.3" "ZGC:92818" "TUBB4B" "ZP3.2" "CALR3B"
[12,] "LRWD1" "GDF3" "GLULB" "BTG2" "RBM38"
[13,] "SI:DKEY-68O6.5" "NPC2" "SI:CH211-173M16.2" "SEC61G" "SI:CH211-269I23.2"
[14,] "F11R.1" "CCNA1" "SLC16A3" "HSPA5" "NPC2"
[15,] "ATP1B1A" "HMGB1A" "CTSS2.1" "SI:CH73-52F24.4" "CDC14B"
[16,] "EFHD1" "CD9B" "BLF" "NPC2" "SI:CH211-199G17.2"
[17,] "RRM2" "SALL4" "F11R.1" "ZGC:158852" "COQ10B"
[18,] "FBXO5" "SI:CH211-137A8.4" "SHISA2" "DDIT4" "BTG2"
[19,] "HIST2H2AB" "QKIA" "CTH1" "NDUFA4L" "PRRG2"
[20,] "CDCA7A" "ZFAND5A" "NNR" "SEPH" "GSTP1"
ZFS_0 ZF30_14 ZFDOME_14 ZFOBLONG_3
[1,] "SI:CH1073-80I24.3" "SI:CH73-281N10.2" "KPNA2" "ALDOB"
[2,] "HMGB1A" "ID1" "FBXO5" "ASB11"
[3,] "NDUFA4L" "HSPB1" "INCENP" "ID1"
[4,] "DYNLL2A" "SEC61G" "MAD2L1" "APOEB"
[5,] "SEPH" "CXCR4B" "SOX3" "HSPB1"
[6,] "HIST2H2AB" "SOX3" "PFKFB4B" "SI:CH1073-80I24.3"
[7,] "RPS20" "VED" "APOEB" "TBX16"
[8,] "CFL1" "RPS12" "VED" "MALAT1"
[9,] "STMN1A" "ALDOB" "POLR3GLA" "ZGC:110425"
[10,] "ZGC:92818" "NNR" "VOX" "FAM212AA"
[11,] "RPS18" "ANP32E" "ASB11" "AKAP12B"
[12,] "RPS12" "ASB11" "SEPT12" "SI:CH211-152C2.3"
[13,] "CDCA7A" "MARCKSL1B" "HSPB1" "NNR"
[14,] "SI:CH211-152C2.3" "SI:DKEY-151G10.6" "SI:CH211-133N4.4" "CXCR4B"
[15,] "SI:CH211-222L21.1" "SI:CH73-1A9.3" "SI:CH211-152C2.3" "SI:CH73-1A9.3"
[16,] "SI:CH73-281N10.2" "DYNLL1" "SOX19A" "CITED4B"
[17,] "LRWD1" "SI:CH211-152C2.3" "SI:CH211-222L21.1" "SI:DKEY-228B2.6"
[18,] "HSPB1" "VOX" "STM" "POLR3GLA"
[19,] "SI:DKEY-151G10.6" "SI:CH211-222L21.1" "MALAT1" "CXCR4A"
[20,] "TUBA8L4" "APOEB" "TUBB4B" "CABZ01070258.1"
ZFS_16 ZF30_7
[1,] "SRSF10B" "TOP2A"
[2,] "CTSBA" "KPNA2"
[3,] "SSR4" "BIRC5A"
[4,] "BIRC5A" "UBTF"
[5,] "FOXD5" "CENPF"
[6,] "HMGB1A" "ASPM"
[7,] "ALCAMB" "LRWD1"
[8,] "SDC4" "UBE2C"
[9,] "SSR3" "PHO"
[10,] "SLC37A2" "ANKRD11"
[11,] "GAPDH" "TPX2"
[12,] "SI:DKEY-27I16.2" "PLK1"
[13,] "YWHAQA" "INCENP"
[14,] "EIF3S10" "FBXO5"
[15,] "SNRNP40" "ZC3H15"
[16,] "DNAJB1B" "ZNFL2A"
[17,] "SEPT12" "ZEB1A"
[18,] "DNTTIP2" "DNTTIP2"
[19,] "ZC3H15" "BCL2L12"
[20,] "NUSAP1" "SEPT12"
ZF75_12 ZF60_12 ZFS_8 ZF50_15 ZF30_8 ZFDOME_5 ZFOBLONG_4
[1,] "SI:CH1073-80I24.3" "SI:CH1073-80I24.3" "VED" "VED" "APOC1" "VED" "CST3"
[2,] "POLR3GLA" "NNR" "ID1" "APOC1" "VED" "BAMBIA" "NOTO"
[3,] "NNR" "CXCR4B" "APOEB" "BAMBIA" "APOEB" "NOTO" "TBX16"
[4,] "LRWD1" "SI:DKEY-228B2.6" "BAMBIA" "APOEB" "BAMBIA" "ARL5C" "SI:DKEY-228B2.6"
[5,] "ADD3B" "SOX3" "APOC1" "ID1" "CALM3A" "VOX" "CMTM6"
[6,] "EIF4EBP3L" "SI:CH211-173M16.2" "CITED4B" "VOX" "TUBB4B" "BMP2B" "BAIAP2L1A"
[7,] "DYNLL1" "ASB11" "SOX19A" "BMP7A" "SI:CH1073-80I24.3" "SI:DKEY-261J4.3" "VOX"
[8,] "ASB11" "LRWD1" "SOX3" "RGCC" "RGCC" "APOC1" "FAM3C"
[9,] "ALDOB" "GLULB" "VOX" "SOX3" "H2AFX" "APOEB" "ANKRD11"
[10,] "FKBP7" "SI:DKEY-27I16.2" "SI:CH1073-80I24.3" "CXCR4B" "CTSBA" "UBTF" "AKAP12B"
[11,] "MARCKSL1B" "FBXO5" "CXCR4B" "SOX19A" "MID1IP1B" "BIRC5A" "SOX11A"
[12,] "GLULB" "ZIC2B" "FOXI1" "GATA2A" "NUSAP1" "SI:DKEY-261J4.5" "HSD17B12A"
[13,] "SOX19A" "SI:DKEY-68O6.5" "RGCC" "CITED4B" "VOX" "CHD4B" "FSCN1A"
[14,] "MPLKIP" "SOX19A" "ID3" "BMP4" "GAPDH" "SI:CH1073-80I24.3" "LFT2"
[15,] "ZIC2B" "POLR3GLA" "POLR3GLA" "ID2A" "TP53INP2" "RPL38" "CALR"
[16,] "CABZ01070258.1" "ID1" "MID1IP1B" "RASL11B" "GLULB" "TBX16" "TP53INP2"
[17,] "SOX11A" "MPLKIP" "ZIC2B" "CRABP2B" "BCAS2" "TIFA" "STM"
[18,] "CDCA7A" "H2AFX" "FAM212AB" "CXCL12A" "SI:CH211-173M16.2" "CMTM6" "PDAP1B"
[19,] "CITED4B" "SOX2" "ALDOB" "CXCR4A" "ID3" "STM" "ZGC:110425"
[20,] "CFL1L" "DNAJB1B" "ASB11" "NNR" "HER7" "RPS20" "EFHD1"
ZFHIGH_6
[1,] "NOTO"
[2,] "WNT11"
[3,] "GSC"
[4,] "FOXD3"
[5,] "SDF4"
[6,] "EZRB"
[7,] "STARD14"
[8,] "FOXA"
[9,] "TBX16"
[10,] "SOX11A"
[11,] "RASGEF1BA"
[12,] "PARD6GB"
[13,] "CEBPB"
[14,] "NOG1"
[15,] "PLK3"
[16,] "SI:DKEY-108K21.14"
[17,] "BTG1"
[18,] "ZGC:92066"
[19,] "TOLLIP"
[20,] "TSPAN15"
ZF3S_25 ZF90_19 ZF60_22 ZFS_13 ZF30_7
[1,] "ZC3H13" "HELLS" "PTMAA" "ANKRD11" "TOP2A"
[2,] "GUSB" "CDCA7A" "HELLS" "ZC3H13" "KPNA2"
[3,] "SSR3" "MCM6" "GOLGB1" "SALL4" "BIRC5A"
[4,] "PNN" "CASP8AP2" "CASP8AP2" "ASPH" "UBTF"
[5,] "DENR" "NASP" "CEP250" "INCENP" "CENPF"
[6,] "MYL12.1" "NAP1L1" "ANKRD11" "BAZ1B" "ASPM"
[7,] "EIF3S10" "SEPH" "MCM6" "PLK1" "LRWD1"
[8,] "ANP32E" "UBTF" "BAZ1B" "CHD4B" "UBE2C"
[9,] "UBTF" "LIG1" "SI:DKEY-56M19.5" "MKI67" "PHO"
[10,] "PTMAA" "ZC3H13" "PPRC1" "CENPF" "ANKRD11"
[11,] "NOP56" "HMGB1A" "ZC3H13" "PNN" "TPX2"
[12,] "OST4" "BAZ1B" "CHD4B" "PHO" "PLK1"
[13,] "SI:DKEY-238C7.12" "PPRC1" "NAT8L" "ZGC:110425" "INCENP"
[14,] "KDELR2B" "PCNA" "MKI67" "PPIG" "FBXO5"
[15,] "SALL4" "PTMAA" "ATP1B1A" "HELLS" "ZC3H15"
[16,] "GLULB" "PNN" "DNTTIP2" "NUSAP1" "ZNFL2A"
[17,] "CHD4B" "HIRIP3" "TUBA1A" "RRP1" "ZEB1A"
[18,] "PHO" "MARCKSL1A" "ZC3H15" "ACIN1A" "DNTTIP2"
[19,] "LRRC59" "SI:DKEY-56M19.5" "LRWD1" "PPRC1" "BCL2L12"
[20,] "ACIN1A" "NOP56" "MALAT1" "CASP8AP2" "SEPT12"
end_nodes_good=names(path_score[path_score>=0.44])
all_nodes_good=c()
for(node in end_nodes_good){
all_nodes_good=c(all_nodes_good,get_upstream(net_int,node))
}
all_nodes_good=unique(all_nodes_good)
net_int_good=net_int[all_nodes_good,all_nodes_good]
draw.net(net_int_good, circular = F,label.size = 0.37)
all_end_nodes=rownames(net_int_good)[which(apply(net_int_good,1,sum)==0)]
all_lineages<-list()
for(end_node in all_end_nodes){
up_nodes=get_upstream(net_int_good,end_node)
path_tbl=data.frame(matrix(nrow=25,ncol=2*length(up_nodes)),stringsAsFactors = F)
name_col=rep(up_nodes,each=2)
name_col[c(1:length(up_nodes))*2]=paste0(name_col[c(1:length(up_nodes))*2],"_Weight")
colnames(path_tbl)=name_col
for(node in up_nodes){
stage=unlist(strsplit(node,"_"))[1]
m=unlist(strsplit(node,"_"))[2]
path_tbl[,node]=as.character(zf_top[[stage]][1:25,paste0("Module.",m)])
path_tbl[,paste0(node,"_Weight")]=(zf_top[[stage]][1:25,paste0("Weights.",m)])
}
all_lineages[[end_node]]=path_tbl
}
#save(all_lineages,file="./Module_tree/module_lineages.Robj")
length(all_lineages)
[1] 40
for(lineage in names(all_lineages)){
if(dim(all_lineages[[lineage]])[2]>6){
print(all_lineages[[lineage]])
}
}
lineage_exp <- function(DS_C_use, DS_G_use, net_int_good, all_lineages, all_end_nodes=NULL){
if(is.null(all_end_nodes)){
all_end_nodes=rownames(net_int_good)[which(apply(net_int_good,1,sum)==0)]
}
stages=names(DS_C_use)
all_cells=c()
all_genes=c()
for(stage in stages){
C_use=DS_C_use[[stage]]
all_cells=c(all_cells,colnames(C_use))
G_use=DS_G_use[[stage]]
all_genes=c(all_genes,rownames(G_use))
}
all_genes=unique(all_genes)
all_Ms=rownames(net_int_good)
allM_allCell=data.frame(matrix(0,ncol = length(all_cells),nrow = length(all_Ms)),row.names = all_Ms)
allGene_allM=data.frame(matrix(0,ncol = length(all_Ms),nrow = length(all_genes)),row.names = all_genes)
colnames(allM_allCell)=all_cells
colnames(allGene_allM)=all_Ms
## look stage by stage, fill in the expression matrix with MAX NORMALIZED gene module expression
for(stage in stages){
G_use=DS_G_use[[stage]]
G.max=apply(G_use, 2, max)
G_norm=sweep(G_use, 2, G.max, '/') ## now each module's top gene has weight 1
colnames(G_norm)=paste0(stage,"_",colnames(G_norm))
M_use=intersect(colnames(G_norm),all_Ms)
C_use=DS_C_use[[stage]]
C.max=apply(C_use, 1, max)
C_norm=sweep(C_use,1,C.max,'/')
rownames(C_norm)=paste0(stage,"_",rownames(C_norm))
if(length(M_use)>0){
## fill in gene matrix
allGene_allM[rownames(G_norm),M_use]=G_norm[rownames(G_norm),M_use]
## fill in cell matrix
allM_allCell[M_use,colnames(C_use)]=C_norm[M_use,colnames(C_use)]
}
}
lineage_cell=data.frame(matrix(0,ncol = length(all_cells),nrow = length(all_end_nodes)),row.names = all_end_nodes, stringsAsFactors = F)
colnames(lineage_cell)=all_cells
# matrix to use: allM_allCell
for(lin in all_end_nodes){
lin_M=colnames(all_lineages[[lin]])[c(T,F)]
if(length(setdiff(lin_M,all_Ms))==0){
## sum up and add
lineage_cell[lin,]=apply(allM_allCell[lin_M,colnames(lineage_cell)],2,sum)
}else{
print(paste(lin,"has module(s) that are not in the table"))
}
}
return(list(lineageXcell=lineage_cell, allMXallCell=allM_allCell, allGeneXallM=allGene_allM))
}
lineage_module_list=lineage_exp(zf_use$C, zf_use$G, net_int_good, all_lineages, all_end_nodes=all_end_nodes)
lineage_cell=lineage_module_list$lineageXcell
allM_allCell=lineage_module_list$allMXallCell
allGene_allM=lineage_module_list$allGeneXallM
clan_coord <- function(net_int_good, node_start, y.names=stages){
y=rev(1:length(y.names))
names(y)=y.names
nodes_in_clan <- get_downstream(net_int_good,node_start)
clan_net <- net_int_good[nodes_in_clan,nodes_in_clan]
end_nodes <- rownames(clan_net)[which(apply(clan_net,1,sum)==0)]
coords=matrix(nrow=length(nodes_in_clan),ncol = 2)
rownames(coords)=nodes_in_clan
colnames(coords)=c("x","y")
m.stages=unlist(lapply(nodes_in_clan, function(x) unlist(strsplit(x,"_"))[1]))
ys=y[m.stages]
coords[,"y"]=ys
if(length(end_nodes)==1){
xs=rep(0, length(nodes_in_clan))
names(xs)=nodes_in_clan
coords[,"x"]=xs[rownames(coords)]
return(coords)
}else{
up_nodes <- list()
for(node in end_nodes){
up_nodes[[node]]=get_upstream(clan_net,node)
}
end_i=end_nodes[1]
end.nodes_added <- c(end_i)
while(length(setdiff(end_nodes,end.nodes_added))>0){
num.comm = unlist(lapply(up_nodes[setdiff(end_nodes,end.nodes_added)], function(x) length(intersect(up_nodes[[node_start]],x))))
end_i=names(which.max(num.comm))
end.nodes_added <- c(end.nodes_added,end_i)
}
end.nodes_xs=1:length(end_nodes)
names(end.nodes_xs)=end.nodes_added
num_branch <- apply(clan_net>0,1,sum)
branch_nodes = names(num_branch)[which(num_branch>1)]
if(!node_start%in%branch_nodes){
branch_nodes=c(branch_nodes,node_start)
}
branch.nodes_xs=c()
for(node in branch_nodes){
branch_ends=intersect(get_downstream(clan_net,node),end_nodes)
branch.nodes_xs=c(branch.nodes_xs, mean(end.nodes_xs[branch_ends]))
}
names(branch.nodes_xs)=branch_nodes
for(node in branch_nodes){
up_nodes[[node]]=get_upstream(clan_net,node)
}
anchor.nodes_xs=c(end.nodes_xs,branch.nodes_xs)
nodes.added=c()
xs.all=c()
for(node in names(up_nodes)){
branch_up=intersect(up_nodes[[node]],branch_nodes)
seg_nodes=setdiff(up_nodes[[node]],unique(unlist(up_nodes[setdiff(branch_up,node)])))
xs.all=c(xs.all,rep(as.numeric(anchor.nodes_xs[node]),length(seg_nodes)))
nodes.added=c(nodes.added,seg_nodes)
}
names(xs.all)=nodes.added
xs.all[names(branch.nodes_xs)]=branch.nodes_xs
coords[,"x"]=xs.all[rownames(coords)]
return(coords)
}
}
start_nodes <- colnames(net_int_good)[which(apply(net_int_good,2,sum)==0)]
all_sub_coords=list()
num_modules=c()
for(node in start_nodes){
all_sub_coords[[node]]=clan_coord(net_int_good,node)
num_modules=c(num_modules,length(get_downstream(net_int_good,node)))
}
names(num_modules)=start_nodes
ordered_modules=names(sort(num_modules,decreasing = T))
combined_coords=all_sub_coords[[ordered_modules[1]]]
for(lin_ind in 2:length(ordered_modules)){
base_coord=max(combined_coords[,"x"])+1
coord2bind=all_sub_coords[[ordered_modules[lin_ind]]]
coord2bind[,"x"]=coord2bind[,"x"]+base_coord
combined_coords=rbind(combined_coords,coord2bind)
}
library(igraph)
g <- graph.adjacency(net_int_good>0)
edge_list=get.edgelist(g)
M.tree <- list(geneXmodule=allGene_allM, moduleXcell=allM_allCell, lineageXcell=lineage_cell, net_adj=net_int_good, coords=combined_coords, edge_list=edge_list, lineage_ident=all_lineages, top_genes=zf_top, ordered_stages=stages, roots=start_nodes, tips=all_end_nodes)
#saveRDS(M.tree, "./ModuleTree201809.rds")
plot_MTree("NOTO",M.tree)
plot_MTree("FOSAB",M.tree)